{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 12.8 \u7b80\u5355\u7684\u5e76\u884c\u7f16\u7a0b\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u95ee\u9898\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u4f60\u6709\u4e2a\u7a0b\u5e8f\u8981\u6267\u884cCPU\u5bc6\u96c6\u578b\u5de5\u4f5c\uff0c\u4f60\u60f3\u8ba9\u4ed6\u5229\u7528\u591a\u6838CPU\u7684\u4f18\u52bf\u6765\u8fd0\u884c\u7684\u5feb\u4e00\u70b9\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u89e3\u51b3\u65b9\u6848\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "concurrent.futures \u5e93\u63d0\u4f9b\u4e86\u4e00\u4e2a ProcessPoolExecutor \u7c7b\uff0c\n\u53ef\u88ab\u7528\u6765\u5728\u4e00\u4e2a\u5355\u72ec\u7684Python\u89e3\u91ca\u5668\u4e2d\u6267\u884c\u8ba1\u7b97\u5bc6\u96c6\u578b\u51fd\u6570\u3002\n\u4e0d\u8fc7\uff0c\u8981\u4f7f\u7528\u5b83\uff0c\u4f60\u9996\u5148\u8981\u6709\u4e00\u4e9b\u8ba1\u7b97\u5bc6\u96c6\u578b\u7684\u4efb\u52a1\u3002\n\u6211\u4eec\u901a\u8fc7\u4e00\u4e2a\u7b80\u5355\u800c\u5b9e\u9645\u7684\u4f8b\u5b50\u6765\u6f14\u793a\u5b83\u3002\u5047\u5b9a\u4f60\u6709\u4e2aApache web\u670d\u52a1\u5668\u65e5\u5fd7\u76ee\u5f55\u7684gzip\u538b\u7f29\u5305\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "logs/\n   20120701.log.gz\n   20120702.log.gz\n   20120703.log.gz\n   20120704.log.gz\n   20120705.log.gz\n   20120706.log.gz\n   ..."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u8fdb\u4e00\u6b65\u5047\u8bbe\u6bcf\u4e2a\u65e5\u5fd7\u6587\u4ef6\u5185\u5bb9\u7c7b\u4f3c\u4e0b\u9762\u8fd9\u6837\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "124.115.6.12 - - [10/Jul/2012:00:18:50 -0500] \"GET /robots.txt ...\" 200 71\n210.212.209.67 - - [10/Jul/2012:00:18:51 -0500] \"GET /ply/ ...\" 200 11875\n210.212.209.67 - - [10/Jul/2012:00:18:51 -0500] \"GET /favicon.ico ...\" 404 369\n61.135.216.105 - - [10/Jul/2012:00:20:04 -0500] \"GET /blog/atom.xml ...\" 304 -\n..."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u4e0b\u9762\u662f\u4e00\u4e2a\u811a\u672c\uff0c\u5728\u8fd9\u4e9b\u65e5\u5fd7\u6587\u4ef6\u4e2d\u67e5\u627e\u51fa\u6240\u6709\u8bbf\u95ee\u8fc7robots.txt\u6587\u4ef6\u7684\u4e3b\u673a\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# findrobots.py\n\nimport gzip\nimport io\nimport glob\n\ndef find_robots(filename):\n    '''\n    Find all of the hosts that access robots.txt in a single log file\n    '''\n    robots = set()\n    with gzip.open(filename) as f:\n        for line in io.TextIOWrapper(f,encoding='ascii'):\n            fields = line.split()\n            if fields[6] == '/robots.txt':\n                robots.add(fields[0])\n    return robots\n\ndef find_all_robots(logdir):\n    '''\n    Find all hosts across and entire sequence of files\n    '''\n    files = glob.glob(logdir+'/*.log.gz')\n    all_robots = set()\n    for robots in map(find_robots, files):\n        all_robots.update(robots)\n    return all_robots\n\nif __name__ == '__main__':\n    robots = find_all_robots('logs')\n    for ipaddr in robots:\n        print(ipaddr)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u524d\u9762\u7684\u7a0b\u5e8f\u4f7f\u7528\u4e86\u901a\u5e38\u7684map-reduce\u98ce\u683c\u6765\u7f16\u5199\u3002\n\u51fd\u6570 find_robots() \u5728\u4e00\u4e2a\u6587\u4ef6\u540d\u96c6\u5408\u4e0a\u505amap\u64cd\u4f5c\uff0c\u5e76\u5c06\u7ed3\u679c\u6c47\u603b\u4e3a\u4e00\u4e2a\u5355\u72ec\u7684\u7ed3\u679c\uff0c\n\u4e5f\u5c31\u662f find_all_robots() \u51fd\u6570\u4e2d\u7684 all_robots \u96c6\u5408\u3002\n\u73b0\u5728\uff0c\u5047\u8bbe\u4f60\u60f3\u8981\u4fee\u6539\u8fd9\u4e2a\u7a0b\u5e8f\u8ba9\u5b83\u4f7f\u7528\u591a\u6838CPU\u3002\n\u5f88\u7b80\u5355\u2014\u2014\u53ea\u9700\u8981\u5c06map()\u64cd\u4f5c\u66ff\u6362\u4e3a\u4e00\u4e2a concurrent.futures \u5e93\u4e2d\u751f\u6210\u7684\u7c7b\u4f3c\u64cd\u4f5c\u5373\u53ef\u3002\n\u4e0b\u9762\u662f\u4e00\u4e2a\u7b80\u5355\u4fee\u6539\u7248\u672c\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# findrobots.py\n\nimport gzip\nimport io\nimport glob\nfrom concurrent import futures\n\ndef find_robots(filename):\n    '''\n    Find all of the hosts that access robots.txt in a single log file\n\n    '''\n    robots = set()\n    with gzip.open(filename) as f:\n        for line in io.TextIOWrapper(f,encoding='ascii'):\n            fields = line.split()\n            if fields[6] == '/robots.txt':\n                robots.add(fields[0])\n    return robots\n\ndef find_all_robots(logdir):\n    '''\n    Find all hosts across and entire sequence of files\n    '''\n    files = glob.glob(logdir+'/*.log.gz')\n    all_robots = set()\n    with futures.ProcessPoolExecutor() as pool:\n        for robots in pool.map(find_robots, files):\n            all_robots.update(robots)\n    return all_robots\n\nif __name__ == '__main__':\n    robots = find_all_robots('logs')\n    for ipaddr in robots:\n        print(ipaddr)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u901a\u8fc7\u8fd9\u4e2a\u4fee\u6539\u540e\uff0c\u8fd0\u884c\u8fd9\u4e2a\u811a\u672c\u4ea7\u751f\u540c\u6837\u7684\u7ed3\u679c\uff0c\u4f46\u662f\u5728\u56db\u6838\u673a\u5668\u4e0a\u9762\u6bd4\u4e4b\u524d\u5feb\u4e863.5\u500d\u3002\n\u5b9e\u9645\u7684\u6027\u80fd\u4f18\u5316\u6548\u679c\u6839\u636e\u4f60\u7684\u673a\u5668CPU\u6570\u91cf\u7684\u4e0d\u540c\u800c\u4e0d\u540c\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u8ba8\u8bba\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "ProcessPoolExecutor \u7684\u5178\u578b\u7528\u6cd5\u5982\u4e0b\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from concurrent.futures import ProcessPoolExecutor\n\nwith ProcessPoolExecutor() as pool:\n    ...\n    do work in parallel using pool\n    ..."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5176\u539f\u7406\u662f\uff0c\u4e00\u4e2a ProcessPoolExecutor \u521b\u5efaN\u4e2a\u72ec\u7acb\u7684Python\u89e3\u91ca\u5668\uff0c\nN\u662f\u7cfb\u7edf\u4e0a\u9762\u53ef\u7528CPU\u7684\u4e2a\u6570\u3002\u4f60\u53ef\u4ee5\u901a\u8fc7\u63d0\u4f9b\u53ef\u9009\u53c2\u6570\u7ed9 ProcessPoolExecutor(N) \u6765\u4fee\u6539\n\u5904\u7406\u5668\u6570\u91cf\u3002\u8fd9\u4e2a\u5904\u7406\u6c60\u4f1a\u4e00\u76f4\u8fd0\u884c\u5230with\u5757\u4e2d\u6700\u540e\u4e00\u4e2a\u8bed\u53e5\u6267\u884c\u5b8c\u6210\uff0c\n\u7136\u540e\u5904\u7406\u6c60\u88ab\u5173\u95ed\u3002\u4e0d\u8fc7\uff0c\u7a0b\u5e8f\u4f1a\u4e00\u76f4\u7b49\u5f85\u76f4\u5230\u6240\u6709\u63d0\u4ea4\u7684\u5de5\u4f5c\u88ab\u5904\u7406\u5b8c\u6210\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u88ab\u63d0\u4ea4\u5230\u6c60\u4e2d\u7684\u5de5\u4f5c\u5fc5\u987b\u88ab\u5b9a\u4e49\u4e3a\u4e00\u4e2a\u51fd\u6570\u3002\u6709\u4e24\u79cd\u65b9\u6cd5\u53bb\u63d0\u4ea4\u3002\n\u5982\u679c\u4f60\u60f3\u8ba9\u4e00\u4e2a\u5217\u8868\u63a8\u5bfc\u6216\u4e00\u4e2a map() \u64cd\u4f5c\u5e76\u884c\u6267\u884c\u7684\u8bdd\uff0c\u53ef\u4f7f\u7528 pool.map() :"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# A function that performs a lot of work\ndef work(x):\n    ...\n    return result\n\n# Nonparallel code\nresults = map(work, data)\n\n# Parallel implementation\nwith ProcessPoolExecutor() as pool:\n    results = pool.map(work, data)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u53e6\u5916\uff0c\u4f60\u53ef\u4ee5\u4f7f\u7528 pool.submit() \u6765\u624b\u52a8\u7684\u63d0\u4ea4\u5355\u4e2a\u4efb\u52a1\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Some function\ndef work(x):\n    ...\n    return result\n\nwith ProcessPoolExecutor() as pool:\n    ...\n    # Example of submitting work to the pool\n    future_result = pool.submit(work, arg)\n\n    # Obtaining the result (blocks until done)\n    r = future_result.result()\n    ..."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5982\u679c\u4f60\u624b\u52a8\u63d0\u4ea4\u4e00\u4e2a\u4efb\u52a1\uff0c\u7ed3\u679c\u662f\u4e00\u4e2a Future \u5b9e\u4f8b\u3002\n\u8981\u83b7\u53d6\u6700\u7ec8\u7ed3\u679c\uff0c\u4f60\u9700\u8981\u8c03\u7528\u5b83\u7684 result() \u65b9\u6cd5\u3002\n\u5b83\u4f1a\u963b\u585e\u8fdb\u7a0b\u76f4\u5230\u7ed3\u679c\u88ab\u8fd4\u56de\u6765\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5982\u679c\u4e0d\u60f3\u963b\u585e\uff0c\u4f60\u8fd8\u53ef\u4ee5\u4f7f\u7528\u4e00\u4e2a\u56de\u8c03\u51fd\u6570\uff0c\u4f8b\u5982\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def when_done(r):\n    print('Got:', r.result())\n\nwith ProcessPoolExecutor() as pool:\n     future_result = pool.submit(work, arg)\n     future_result.add_done_callback(when_done)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u56de\u8c03\u51fd\u6570\u63a5\u53d7\u4e00\u4e2a Future \u5b9e\u4f8b\uff0c\u88ab\u7528\u6765\u83b7\u53d6\u6700\u7ec8\u7684\u7ed3\u679c\uff08\u6bd4\u5982\u901a\u8fc7\u8c03\u7528\u5b83\u7684result()\u65b9\u6cd5\uff09\u3002\n\u5c3d\u7ba1\u5904\u7406\u6c60\u5f88\u5bb9\u6613\u4f7f\u7528\uff0c\u5728\u8bbe\u8ba1\u5927\u7a0b\u5e8f\u7684\u65f6\u5019\u8fd8\u662f\u6709\u5f88\u591a\u9700\u8981\u6ce8\u610f\u7684\u5730\u65b9\uff0c\u5982\u4e0b\u51e0\u70b9\uff1a"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u4e00\u65e6\u542f\u52a8\u4f60\u4e0d\u80fd\u63a7\u5236\u5b50\u8fdb\u7a0b\u7684\u4efb\u4f55\u884c\u4e3a\uff0c\u56e0\u6b64\u6700\u597d\u4fdd\u6301\u7b80\u5355\u548c\u7eaf\u6d01\u2014\u2014\u51fd\u6570\u4e0d\u8981\u53bb\u4fee\u6539\u73af\u5883\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5b83\u4f1a\u514b\u9686Python\u89e3\u91ca\u5668\uff0c\u5305\u62ecfork\u65f6\u7684\u6240\u6709\u7a0b\u5e8f\u72b6\u6001\u3002\n\u800c\u5728Windows\u4e0a\uff0c\u514b\u9686\u89e3\u91ca\u5668\u65f6\u4e0d\u4f1a\u514b\u9686\u72b6\u6001\u3002\n\u5b9e\u9645\u7684fork\u64cd\u4f5c\u4f1a\u5728\u7b2c\u4e00\u6b21\u8c03\u7528 pool.map() \u6216 pool.submit() \u540e\u53d1\u751f\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u4f60\u5e94\u8be5\u5728\u521b\u5efa\u4efb\u4f55\u7ebf\u7a0b\u4e4b\u524d\u5148\u521b\u5efa\u5e76\u6fc0\u6d3b\u8fdb\u7a0b\u6c60\uff08\u6bd4\u5982\u5728\u7a0b\u5e8f\u542f\u52a8\u7684main\u7ebf\u7a0b\u4e2d\u521b\u5efa\u8fdb\u7a0b\u6c60\uff09\u3002"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.1"
    },
    "toc": {
      "base_numbering": 1,
      "nav_menu": {},
      "number_sections": true,
      "sideBar": true,
      "skip_h1_title": true,
      "title_cell": "Table of Contents",
      "title_sidebar": "Contents",
      "toc_cell": false,
      "toc_position": {},
      "toc_section_display": true,
      "toc_window_display": true
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}